# -*- coding: utf-8 -*-
'''
Created on 28.12.2009

@author: igor.fld
Retrive language information
'''

import urllib2 , re
import MySQLdb
from lxml import etree

url="http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes"

ua='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; .NET CLR 3.5.21022'

req=urllib2.Request(url)
req.add_header('User-Agent',ua)

parser=etree.HTMLParser()
res=urllib2.urlopen(req)

tree=etree.parse(res,parser)
con=MySQLdb.connect(user='root',passwd='root',db='bogo')
cur=con.cursor()
cur.execute("""set names utf8""")

languages=tree.xpath('//table[@class="wikitable sortable"]/tr')

data={}


def clean(data):
     data={}
     
def dump(data):
     return (data['abbr'],data['name'])
     
def load(data):
     global cur,con
     try:
          cur.execute("""INSERT INTO languages(id,title,abbr,nl_title) VALUES(null,%s,%s,%s)""",(data['name'],
                                                                                    data['abbr'],
                                                                                    data['native']))
	  con.commit()

     except MySQLdb.Error as e:
          print "ERROR occured %d, %s"%(e.args[0],e.args[1])
     
def getelm(item):
     """
     This function returns first element from list 
    
     """
     if isinstance(item,list):
	  if len(item) == 0:
	       return None
	  else:
	       return item[0]
     else:
	  return item

          
def main():
     for l in languages:
          clean(data)
          data['abbr']=getelm(l.xpath('./td[1]/text()'))
          data['name']=getelm(l.xpath('./td[5]/a/text()'))
          data['native']=getelm(l.xpath('./td[6]/text()'))
          print "Loading data",dump(data)
	  if data['abbr'] != None:
	       load(data)

if __name__ == "__main__":
     main()
          